mapTheTimer() | Uses mmap() to map the cycle counter into the address space. Returns the unit-value of the timer in picoseconds; for example returns 21000 in a Challenge where the timer unit value is 21 nanoseconds. |
timerBitCount() | Returns the number of bits of precision in the timer, which varies with the CPU board type, either 32 or 64 bits. |
readTimer32() | Returns the least-significant (or only) word of the timer value. |
readTimer64() | Returns the timer value as a 64-bit unsigned integer (extended with 0-bits when necessary). |
main() | Compiled only when variable UNIT_TEST is set, contains code to exercise the preceding functions. |
Example A-2 : Functions to Map and Read the Cycle Counter
/***************************************************************************** || || The functions in this module provide access to the free-running timer || on the CPU board of certain SGI systems. || || timerBitCount() || || Returns the number of bits of data in the timer, as reported || by syssgi(SGI_CYCLECNTR_SIZE): || 0 error reported by syssgi -- probably no timer in this machine || 32 in an Indy or Crimson || 64 in a Challenge, Onyx, and other big machines. || || mapTheTimer() || || This function tests the hardware environment. If the current system has || a timer, the function tries to map it into memory. Errors can include: || * 0 returned by timerBitCount() || * error returned by syssgi(SGI_QUERY_CYCLECNTR) || * error returned by mmap(2) || When there is no error, the function returns a positive integer which is || the number of picoseconds represented by one unit increment of the timer. || In the event of an error, the function returns 0, and errno is set to || some error code. || mapTheTimer() can be called multiple times without harm. To convert || its returned value to a fraction of a second, convert to double and || multiply by 1e-12. || || readTimer32() || || This function calls mapTheTimer(), if it has not been called already. || Thus the first attempt to read the clock will map it if necessary. || If the timer has been mapped, its least-significant bits are returned || as an unsigned 32-bit integer. || * if mapTheTimer() failed, the returned value is always 0 || * if the timer has 32-bit precision, the returned value is || the whole timer value || * if the timer has 64-bit precision (e.g. Challenge), the returned || value is the low-order word. || || readTimer64() || || This function is like readTimer32(), except that it returns an unsigned || 64-bit integer. || * if mapTheTimer() failed, the returned value is always 0 || * if the timer has 32-bit precision, the returned value is || the whole timer value, extended with high-order 0-bits || * if the timer has 64-bit precision, the returned value is the whole || timer value. The 64-bit timer is sampled in such as way as to || compensate for rollover while minimizing bus traffic. || || main() || || Compiled only when UNIT_TEST is defined, provides a functional test || platform for the above functions. || || NOTE: in two of these routines we assume that this machine is operating || in big-endian mode, such that the least-significant 32 bits of a || long-long are at the higher word address. || *****************************************************************************/ #include <stddef.h> /* for NULL */ #include <fcntl.h> /* for O_RDONLY and open() */ #include <unistd.h> /* for getpagesize() */ #include <sys/mman.h> /* for constants used with mmap() */ #include <sgidefs.h> /* for __psint_t, __uint*_t, and ABI defs */ #include <sys/syssgi.h> /* for syssgi(), SGI_QUERY_CYCLECNTR */ #include <errno.h> /* for errno global */ /***************************************************************************** || The following globals are set up by mapTheTimer() the first time called. || timerMapAddress == NULL means mapTheTimer() has never been called || == -1 means mapTheTimer() called and failed || else it points to the timer in memory || The data type (void *) is coerced to __uint32_t or __uint64_t in use. || || The "volatile" declaration keeps the compiler from optimizing away || successive references to it. || || timerPicoSecs == 0 means the timer has not been mapped successfully || else is the value returned by syssgi(QUERY_CYCLECOUNTER) || || timerPrecision == value returned by syssgi(SGI_CYCLECNTR_SIZE), || but as this value is needed in the timer-reading || functions, it is cached, so as to avoid a system call || every time we read the clock. || || If this code was redone in C++ (not a bad idea, feel free) these would || be class variables. *****************************************************************************/ #define TIMER_IS_MAPPED (0 != timerPicoSecs) #define TIMER_MAP_ATTEMPTED (NULL != timerMapAddress) static volatile void * timerMapAddress = NULL; static unsigned int timerPicoSecs = 0; static unsigned int timerPrecision = 0; unsigned int mapTheTimer() { __uint32_t timerUnits = 0; /* receives timer picosecond unit value */ __psint_t timerPhysAddr; /* receives timer absolute address */ __psint_t timerPhysVPN; /* timerPhysAddr masked to a page boundary */ __psint_t addrMask; /* page offset bit mask */ int fdMem; /* file descriptor for /dev/mmem */ if ( ! TIMER_MAP_ATTEMPTED) /* first time through this code */ { /* || Get the physical address of the clock in full. If there || is no cycle counter on this machine, syssgi returns -1. */ timerPhysAddr = syssgi(SGI_QUERY_CYCLECNTR, &timerUnits); if ((__psint_t)-1 != timerPhysAddr) /* we have a timer */ { /* || Trim out the offset from the address leaving the || page number part of the address. (VPN == virtual page number) */ addrMask = getpagesize() - 1; timerPhysVPN = timerPhysAddr & ~addrMask; /* || Map the page containing the clock's address into the virtual || address space of this process. */ fdMem = open("/dev/mmem", O_RDONLY); timerMapAddress = (void *) mmap( NULL, /* addr = 0, don't care it goes */ addrMask, /* len = pagesize - 1 */ PROT_READ, /* prot = read-only */ MAP_PRIVATE, /* changes are unshared (n.a.) */ fdMem, /* map base is physical memory */ (off_t)timerPhysVPN /* source address to map */ ); if ((__psint_t)-1 != (__psint_t)timerMapAddress) { /* || mmap() succeeded, cache info in global variables. */ timerPicoSecs = timerUnits; timerPrecision = syssgi(SGI_CYCLECNTR_SIZE); /* || Restore any nonzero offset bits to mapped page address. */ timerMapAddress = (void*) ( ((__psint_t)timerMapAddress) /* addr as int */ | (timerPhysAddr & addrMask) /* plus offset bits */ ); } else ; /* mmap() failed, timerMapAddress == -1, errno set */ } /* end syssgi() successful */ else { timerMapAddress = (void *)-1; /* syssgi error, no timer (?) */ } } /* end attempting to initialize */ return timerPicoSecs; } unsigned int timerBitCount() { if (TIMER_IS_MAPPED) return timerPrecision; if ( ! TIMER_MAP_ATTEMPTED) { mapTheTimer(); return timerPrecision; } else return 0; } /***************************************************************************** || || In both of the following routines, one goal is to minimize the number of || references to the mapped timer. Reason: each such reference is an || uncached memory reference plus a bus access, taking at least 1 usec and || possibly more depending on the machine. Unnecessary references to the || timer should be avoided when possible. || || If the timer has 64 bits, return its least-significant word. Which word || is that? This code assumes the big-endian model. An alternative || would be to load the long-long value and force C to convert it. That is || be portable but would hit the bus twice instead of once, nullifying the || speed advantage that this routine has over the one following. || *****************************************************************************/ __uint32_t readTimer32() { __uint32_t ret = 0; if ( ! TIMER_IS_MAPPED ) mapTheTimer(); if ( TIMER_IS_MAPPED ) /* timer mapped ok */ { if (64 == timerPrecision) ret = ((__uint32_t *)timerMapAddress)[1]; /* low word of 2 */ else /* in IRIX 6.2, 32 bits is the only alternative */ ret = *((__uint32_t *)timerMapAddress); } return ret; } /***************************************************************************** || || When the timer has 32 bits, just fake up a long-long and return it. || For long timers we must ask: was this code compiled to an ABI that does || atomic loads of long-longs (-64 or -n32), or not (-32)? || In the newer ABIs, we just fetch the 64-bit timer in one move. || || When compiled under a 32-bit system, the generated code loads the timer || value in two "lw" instructions. The low word of the timer overflows into || the high word about every 90 seconds, and if that happens between the || lw's, the result will be wrong. Worse, we cannot be certain which of the || two words the compiler will choose to load first, the low or the high. || || In order to minimize the number of uncached accesses, we test for || overflow only when it has recently happened; that is, when || the most significant 9 bits of the low word are all-0. This || condition defines a window of 0.17 seconds following the overflow || (21e-12 * 2^23 == .176160768). || If this were kernel code, the window could be much smaller. In enabled || code we have to allow for a series of interrupts between the load of the || upper and lower words. As it is, if we load the upper word just before || overflow, and an interrupt delays the next fetch 0.17+ seconds, we will || return an incorrect value. || *****************************************************************************/ __uint64_t readTimer64() { union { struct { __uint32_t msw,lsw; }w; __uint64_t ll; } ret; ret.ll = 0; if ( ! TIMER_IS_MAPPED ) mapTheTimer(); if ( TIMER_IS_MAPPED ) /* it mapped ok */ { if (timerPrecision == 32) { ret.w.msw = 0; ret.w.lsw = *((__uint32_t *)timerMapAddress); } else { #if (_MIPS_SIM == _MIPS_SIM_NABI32 || _MIPS_SIM == _MIPS_SIM_ABI64) /* 64-bit loads are atomic */ ret.ll = *(__uint64_t *)timerMapAddress; #else /* 64-bit loads are not atomic */ ret.w.msw = ((__uint32_t *)timerMapAddress)[0]; ret.w.lsw = ((__uint32_t *)timerMapAddress)[1]; if ( (ret.w.lsw & 0xff800000) == 0) { /* || The high word incremented not more than .17 sec ago. || Provided there is not a delay here exceeding 89.8 sec, || the following single load ensures we have the high word || value that is correctly associated with the low word || we already picked up. */ ret.w.msw = ((__uint32_t *)timerMapAddress)[0]; } #endif } } return ret.ll; } #ifdef UNIT_TEST #include <stdio.h> int main(int argc, char*argv[]) { int j; int numTix = 10; unsigned int picosecs; unsigned short tbits; double dmicsecs; if (argc>1) numTix = atoi(argv[1]); if ( picosecs = mapTheTimer() ) { tbits = timerBitCount(); dmicsecs = ((double)picosecs)/1e6; printf("The timer has %d bits of precision\n",tbits); printf("One timer unit == %d picoseconds or %g us\n", picosecs, dmicsecs); } else { perror("mapTheTimer"); return errno; } { __uint32_t st1, st2, stx; st1 = readTimer32(); printf("\nreading timer as 32 bits\n\n"); for(j=0; j<numTix; ++j) { st2 = readTimer32(); stx = st2 - st1; printf("0x%0x - 0x%0x = 0x%0x (%g usecs)\n", st2, st1, stx, (stx*dmicsecs) ); st1 = st2; } } { __uint64_t lt1, lt2, ltx; lt1 = readTimer64(); printf("\nreading timer as 64 bits\n\n"); for(j=0; j<numTix; ++j) { lt2 = readTimer64(); ltx = lt2 - lt1; printf("0x%0llx - 0x%0llx = 0x%0llx (%g usecs)\n", lt2, lt1, ltx, (ltx * dmicsecs)); lt1 = lt2; } } } #endif